In [3]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
import pymc3 as pm
%matplotlib inline
sns.set(font_scale=1.5)
In [4]:
theta_real = 0.35
trials = [0, 1, 2, 3, 4, 8, 16, 32, 50, 150]
data = [0, 1, 1, 1, 1, 4, 6, 9, 13, 48]
beta_params = [(1, 1), (0.5, 0.5), (20, 20)]
In [5]:
plt.figure(figsize=(10,12))
dist = stats.beta
x = np.linspace(0, 1, 100)
for idx, N in enumerate(trials):
if idx == 0:
plt.subplot(4,3, 2)
else:
plt.subplot(4,3, idx+3)
y = data[idx]
for (a_prior, b_prior), c in zip(beta_params, ('b', 'r', 'g')):
p_theta_given_y = dist.pdf(x, a_prior + y, b_prior + N - y)
plt.plot(x, p_theta_given_y, c)
plt.fill_between(x, 0, p_theta_given_y, color=c, alpha=0.6)
plt.axvline(theta_real, ymax=0.3, color='k')
plt.plot(0, 0, label="{:d} experiments\n{:d} heads".format(N,y), alpha=0)
plt.xlim(0,1)
plt.ylim(0,12)
plt.xlabel(r'$\theta$')
plt.legend()
plt.gca().axes.get_yaxis().set_visible(False)
plt.tight_layout()
In [6]:
def posterior_grid(grid_points=100, heads=6, tosses=9):
"""
A grid implementation for the coin-flip problem
"""
grid = np.linspace(0, 1, grid_points)
prior = np.repeat(1, grid_points)
likelihood = stats.binom.pmf(heads, tosses, grid)
unstd_posterior = likelihood * prior
posterior = unstd_posterior / unstd_posterior.sum()
return grid, posterior
In [7]:
#Assuming we made 4 tosses and we observe only 1 head we have the following:
points = 15
h, n = 1, 4
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)
Out[7]:
In [8]:
#Assuming we made 40 tosses and we observe only 1 head we have the following:
points = 15
h, n = 1, 40
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)
Out[8]:
In [9]:
#Assuming we made 40 tosses and we observe 24 head we have the following:
points = 15
h, n = 24, 40
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)
plt.figure()
points = 150
h, n = 24, 40
grid, posterior = posterior_grid(points, h, n)
plt.plot(grid, posterior, 'o-', label='heads = {}\ntosses = {}'.format(h, n))
plt.xlabel(r'$\theta$')
plt.legend(loc=0)
Out[9]:
In [10]:
np.random.seed(123)
n_experiments = 4
theta_real = 0.35
data = stats.bernoulli.rvs(p=theta_real, size=n_experiments)
print(data)
In [11]:
XX = np.linspace(0,1,100)
plt.plot(XX, stats.beta(1,1).pdf(XX))
Out[11]:
In [12]:
with pm.Model() as our_first_model:
theta = pm.Beta('theta', alpha=1, beta=1)
y = pm.Bernoulli('y', p=theta, observed=data)
start = pm.find_MAP()
step = pm.Metropolis()
trace = pm.sample(1000, step=step, start=start, chains=4)
In [13]:
burnin = 100
chain = trace[burnin:]
ax = pm.traceplot(chain, lines={'theta':theta_real});
ax[0][0].axvline(theta_real, c='r')
Out[13]:
In [14]:
theta_real
Out[14]:
In [15]:
pm.gelman_rubin(chain) # want < 1.1
Out[15]:
In [16]:
pm.forestplot(chain)
Out[16]:
In [17]:
pm.summary(trace)
Out[17]:
In [18]:
pm.autocorrplot(trace)
Out[18]:
In [19]:
# a measure of eff n based on autocorrelecation
pm.effective_n(trace)
Out[19]:
In [20]:
# AKA Kruschke plot
pm.plot_posterior(trace)
Out[20]:
In [21]:
pm.plot_posterior(trace, rope=[0.45, .55])
Out[21]:
In [22]:
pm.plot_posterior(trace, ref_val=0.50)
Out[22]:
In [23]:
data = stats.bernoulli.rvs(p=theta_real, size=1000) # 1000 flips in the data
In [24]:
with pm.Model() as our_first_model:
theta = pm.Beta('theta', alpha=1, beta=1)
y = pm.Bernoulli('y', p=theta, observed=data)
start = pm.find_MAP()
step = pm.Metropolis()
trace = pm.sample(10000, step=step, start=start, chains=4)
In [25]:
burnin = 100
chain = trace[burnin:]
ax = pm.traceplot(chain, lines={'theta':theta_real});
ax[0][0].axvline(theta_real, c='r')
Out[25]:
In [26]:
pm.gelman_rubin(chain) # want < 1.1
Out[26]:
In [27]:
pm.forestplot(chain) # super tight range
Out[27]:
In [28]:
pm.summary(trace)
Out[28]:
In [29]:
pm.autocorrplot(trace)
Out[29]:
In [30]:
pm.effective_n(trace)
Out[30]:
In [31]:
pm.plot_posterior(trace, rope=[0.45, .55])
Out[31]:
In [32]:
pm.plot_posterior(trace, ref_val=0.50)
Out[32]:
In [33]:
data = stats.bernoulli.rvs(p=theta_real, size=25) # 25 flips in the data
In [34]:
with pm.Model() as our_first_model:
theta = pm.Beta('theta', alpha=1, beta=1)
y = pm.Bernoulli('y', p=theta, observed=data)
start = pm.find_MAP()
step = pm.Metropolis()
trace = pm.sample(10000, step=step, start=start, chains=4)
In [35]:
burnin = 100
chain = trace[burnin:]
ax = pm.traceplot(chain, lines={'theta':theta_real});
ax[0][0].axvline(theta_real, c='r')
Out[35]:
In [36]:
pm.gelman_rubin(chain) # want < 1.1
Out[36]:
In [37]:
pm.forestplot(chain) # super tight range
Out[37]:
In [38]:
pm.summary(trace)
Out[38]:
In [39]:
pm.autocorrplot(trace)
Out[39]:
In [40]:
pm.effective_n(trace)
Out[40]:
In [41]:
pm.plot_posterior(trace, rope=[0.45, .55])
Out[41]:
In [42]:
pm.plot_posterior(trace, ref_val=0.50)
Out[42]:
In [43]:
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])
Out[43]:
In [44]:
np.random.seed(123)
n_experiments = 4
theta_real = 0.35
data = stats.bernoulli.rvs(p=theta_real, size=n_experiments)
print(data)
In [48]:
with pm.Model() as our_first_model:
theta = pm.Beta('theta', alpha=1, beta=1)
y = pm.Bernoulli('y', p=theta, observed=data)
start = pm.find_MAP()
step = pm.Metropolis()
trace = pm.sample(5000, step=step, start=start, chains=8)
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])
plt.title("pm.Beta('theta', alpha=1, beta=1)")
Out[48]:
In [53]:
with pm.Model() as our_first_model:
theta = pm.Uniform('theta', .2, .4)
y = pm.Bernoulli('y', p=theta, observed=data)
step = pm.Metropolis()
trace = pm.sample(5000, step=step, chains=8)
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])
plt.title("pm.Uniform('theta', 0, 1)")
Out[53]:
In [50]:
with pm.Model() as our_first_model:
theta = pm.Normal('theta', 0.35, 1)
y = pm.Bernoulli('y', p=theta, observed=data)
step = pm.Metropolis()
trace = pm.sample(5000, step=step, chains=8)
pm.plot_posterior(trace, ref_val=0.50, rope=[0.45, .55])
plt.title("pm.Normal('theta', 0.35, 1)")
Out[50]:
In [52]:
pm.plots.densityplot(trace, hpd_markers='v')
Out[52]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: